# Data Analysis Phase
## MAin aim is to understand more about the data
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import scipy.stats as stats
import seaborn as sns
## Display all the columns of the dataframe
pd.pandas.set_option('display.max_columns',None)
first_data=pd.read_csv(r"C:\Users\Bilal\Desktop\AML Assignment\world_bank_development_indicators.csv")
df=first_data.copy()
## print shape of dataset with rows and columns
print(df.shape)
(16780, 50)
# Handling missing values
# numeric_columns = df.select_dtypes(include=['number']).columns
# numeric_columns = [col for col in numeric_columns if col != 'country' and col !='date']
# for column in numeric_columns:
# median_value = df[column].median()
# df[column].fillna(median_value, inplace=True)
df
| country | date | agricultural_land% | forest_land% | land_area | avg_precipitation | trade_in_services% | control_of_corruption_estimate | control_of_corruption_std | access_to_electricity% | renewvable_energy_consumption% | electric_power_consumption | CO2_emisions | other_greenhouse_emisions | population_density | inflation_annual% | real_interest_rate | risk_premium_on_lending | research_and_development_expenditure% | central_goverment_debt% | tax_revenue% | expense% | goverment_effectiveness_estimate | goverment_effectiveness_std | human_capital_index | doing_business | time_to_get_operation_license | statistical_performance_indicators | individuals_using_internet% | logistic_performance_index | military_expenditure% | GDP_current_US | political_stability_estimate | political_stability_std | rule_of_law_estimate | rule_of_law_std | regulatory_quality_estimate | regulatory_quality_std | government_expenditure_on_education% | government_health_expenditure% | multidimensional_poverty_headcount_ratio% | gini_index | birth_rate | death_rate | life_expectancy_at_birth | population | rural_population | voice_and_accountability_estimate | voice_and_accountability_std | intentional_homicides | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Afghanistan | 1/1/1960 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 537777811.10 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 50.34 | 31.92 | 32.53 | 8622466.00 | 7898093.00 | NaN | NaN | NaN |
| 1 | Afghanistan | 1/1/1961 | 57.80 | NaN | 652230.00 | 327.00 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 13.48 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 548888895.60 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 50.44 | 31.35 | 33.07 | 8790140.00 | 8026804.00 | NaN | NaN | NaN |
| 2 | Afghanistan | 1/1/1962 | 57.89 | NaN | 652230.00 | 327.00 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 13.75 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 546666677.80 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 50.57 | 30.84 | 33.55 | 8969047.00 | 8163985.00 | NaN | NaN | NaN |
| 3 | Afghanistan | 1/1/1963 | 57.97 | NaN | 652230.00 | 327.00 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 14.04 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 751111191.10 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 50.70 | 30.36 | 34.02 | 9157465.00 | 8308019.00 | NaN | NaN | NaN |
| 4 | Afghanistan | 1/1/1964 | 58.07 | NaN | 652230.00 | 327.00 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 14.34 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 800000044.40 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 50.83 | 29.87 | 34.49 | 9355514.00 | 8458694.00 | NaN | NaN | NaN |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 16775 | Zimbabwe | 1/1/2018 | 41.88 | 45.33 | 386850.00 | 657.00 | 4.47 | -1.23 | 0.13 | 45.40 | 80.43 | NaN | 11069.90 | 29374.29 | 38.91 | 10.62 | -64.38 | NaN | NaN | NaN | 7.21 | 10.98 | -1.26 | 0.17 | 0.46 | NaN | NaN | 59.51 | 25.00 | 2.12 | 0.31 | 34156069918.00 | -0.72 | 0.22 | -1.28 | 0.14 | -1.51 | 0.16 | 3.87 | 1.59 | NaN | NaN | 32.07 | 7.97 | 61.41 | 15052184.00 | 10204026.00 | -1.14 | 0.12 | 4.88 |
| 16776 | Zimbabwe | 1/1/2019 | 41.88 | 45.21 | 386850.00 | 657.00 | 6.93 | -1.27 | 0.14 | 46.68 | 81.52 | NaN | 10185.30 | 28697.16 | 39.69 | 255.30 | -64.30 | NaN | NaN | NaN | NaN | NaN | -1.27 | 0.18 | NaN | 140.00 | NaN | 61.65 | 26.59 | NaN | 0.53 | 21832234926.00 | -0.96 | 0.23 | -1.29 | 0.14 | -1.47 | 0.16 | NaN | 0.52 | NaN | 50.30 | 31.52 | 8.04 | 61.29 | 15354608.00 | 10408889.00 | -1.16 | 0.12 | 5.15 |
| 16777 | Zimbabwe | 1/1/2020 | 41.88 | 45.09 | 386850.00 | 657.00 | 5.12 | -1.29 | 0.14 | 52.75 | 84.36 | NaN | 8312.50 | 25988.13 | 40.51 | 557.20 | -81.13 | NaN | NaN | NaN | NaN | NaN | -1.30 | 0.21 | 0.47 | NaN | NaN | NaN | 29.30 | NaN | 0.01 | 21509698406.00 | -1.07 | 0.24 | -1.31 | 0.14 | -1.42 | 0.17 | NaN | 0.76 | NaN | NaN | 31.01 | 8.13 | 61.12 | 15669666.00 | 10617452.00 | -1.11 | 0.12 | 4.98 |
| 16778 | Zimbabwe | 1/1/2021 | NaN | NaN | NaN | NaN | NaN | -1.26 | 0.15 | 48.98 | NaN | NaN | NaN | NaN | NaN | 98.55 | -31.80 | NaN | NaN | NaN | NaN | NaN | -1.24 | 0.21 | NaN | NaN | NaN | NaN | 34.81 | NaN | 0.01 | 28371238666.00 | -1.03 | 0.24 | -1.26 | 0.15 | -1.37 | 0.16 | NaN | NaN | NaN | NaN | 30.54 | 9.06 | 59.25 | 15993524.00 | 10827136.00 | -1.14 | 0.12 | 6.14 |
| 16779 | Zimbabwe | 1/1/2022 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 104.71 | -18.32 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 2.50 | NaN | 20678055598.00 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 16320537.00 | 11033499.00 | NaN | NaN | NaN |
16780 rows × 50 columns
dataset=df.copy()
features_with_na=[features for features in dataset.columns if dataset[features].isnull().sum()>1]
for feature in features_with_na:
print(feature, np.round(dataset[feature].isnull().mean(), 2), '\b% missing values')
print()
print()
print()
features_with_nan = [feature for feature in dataset.columns if dataset[feature].isna().sum() > 1]
for feature in features_with_nan:
print(f"{feature}: {np.round(dataset[feature].isna().mean() * 100, 2)}% NaN values")
agricultural_land% 0.09 % missing values forest_land% 0.53 % missing values land_area 0.07 % missing values avg_precipitation 0.4 % missing values trade_in_services% 0.45 % missing values control_of_corruption_estimate 0.73 % missing values control_of_corruption_std 0.73 % missing values access_to_electricity% 0.56 % missing values renewvable_energy_consumption% 0.52 % missing values electric_power_consumption 0.54 % missing values CO2_emisions 0.56 % missing values other_greenhouse_emisions 0.56 % missing values population_density 0.07 % missing values inflation_annual% 0.36 % missing values real_interest_rate 0.74 % missing values risk_premium_on_lending 0.86 % missing values research_and_development_expenditure% 0.83 % missing values central_goverment_debt% 0.88 % missing values tax_revenue% 0.7 % missing values expense% 0.72 % missing values goverment_effectiveness_estimate 0.73 % missing values goverment_effectiveness_std 0.73 % missing values human_capital_index 0.96 % missing values doing_business 0.99 % missing values time_to_get_operation_license 0.98 % missing values statistical_performance_indicators 0.96 % missing values individuals_using_internet% 0.52 % missing values logistic_performance_index 0.92 % missing values military_expenditure% 0.41 % missing values GDP_current_US 0.2 % missing values political_stability_estimate 0.72 % missing values political_stability_std 0.72 % missing values rule_of_law_estimate 0.72 % missing values rule_of_law_std 0.72 % missing values regulatory_quality_estimate 0.73 % missing values regulatory_quality_std 0.73 % missing values government_expenditure_on_education% 0.65 % missing values government_health_expenditure% 0.71 % missing values multidimensional_poverty_headcount_ratio% 0.97 % missing values gini_index 0.88 % missing values birth_rate 0.04 % missing values death_rate 0.05 % missing values life_expectancy_at_birth 0.05 % missing values population 0.01 % missing values rural_population 0.01 % missing values voice_and_accountability_estimate 0.72 % missing values voice_and_accountability_std 0.72 % missing values intentional_homicides 0.75 % missing values agricultural_land%: 8.83% NaN values forest_land%: 52.84% NaN values land_area: 6.98% NaN values avg_precipitation: 39.89% NaN values trade_in_services%: 45.38% NaN values control_of_corruption_estimate: 72.8% NaN values control_of_corruption_std: 72.8% NaN values access_to_electricity%: 56.21% NaN values renewvable_energy_consumption%: 51.87% NaN values electric_power_consumption: 53.58% NaN values CO2_emisions: 55.85% NaN values other_greenhouse_emisions: 55.85% NaN values population_density: 7.16% NaN values inflation_annual%: 35.79% NaN values real_interest_rate: 73.8% NaN values risk_premium_on_lending: 85.89% NaN values research_and_development_expenditure%: 83.46% NaN values central_goverment_debt%: 87.67% NaN values tax_revenue%: 69.73% NaN values expense%: 71.79% NaN values goverment_effectiveness_estimate: 72.94% NaN values goverment_effectiveness_std: 72.94% NaN values human_capital_index: 96.42% NaN values doing_business: 98.87% NaN values time_to_get_operation_license: 97.84% NaN values statistical_performance_indicators: 95.89% NaN values individuals_using_internet%: 52.47% NaN values logistic_performance_index: 91.62% NaN values military_expenditure%: 40.9% NaN values GDP_current_US: 20.35% NaN values political_stability_estimate: 72.5% NaN values political_stability_std: 72.5% NaN values rule_of_law_estimate: 72.26% NaN values rule_of_law_std: 72.26% NaN values regulatory_quality_estimate: 72.93% NaN values regulatory_quality_std: 72.93% NaN values government_expenditure_on_education%: 64.54% NaN values government_health_expenditure%: 70.57% NaN values multidimensional_poverty_headcount_ratio%: 97.06% NaN values gini_index: 88.46% NaN values birth_rate: 4.43% NaN values death_rate: 4.54% NaN values life_expectancy_at_birth: 5.45% NaN values population: 0.69% NaN values rural_population: 1.44% NaN values voice_and_accountability_estimate: 72.3% NaN values voice_and_accountability_std: 72.3% NaN values intentional_homicides: 74.92% NaN values
CountryWiseData=dataset.groupby('country').mean()
CountryWiseData
| agricultural_land% | forest_land% | land_area | avg_precipitation | trade_in_services% | control_of_corruption_estimate | control_of_corruption_std | access_to_electricity% | renewvable_energy_consumption% | electric_power_consumption | CO2_emisions | other_greenhouse_emisions | population_density | inflation_annual% | real_interest_rate | risk_premium_on_lending | research_and_development_expenditure% | central_goverment_debt% | tax_revenue% | expense% | goverment_effectiveness_estimate | goverment_effectiveness_std | human_capital_index | doing_business | time_to_get_operation_license | statistical_performance_indicators | individuals_using_internet% | logistic_performance_index | military_expenditure% | GDP_current_US | political_stability_estimate | political_stability_std | rule_of_law_estimate | rule_of_law_std | regulatory_quality_estimate | regulatory_quality_std | government_expenditure_on_education% | government_health_expenditure% | multidimensional_poverty_headcount_ratio% | gini_index | birth_rate | death_rate | life_expectancy_at_birth | population | rural_population | voice_and_accountability_estimate | voice_and_accountability_std | intentional_homicides | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| country | ||||||||||||||||||||||||||||||||||||||||||||||||
| Afghanistan | 58.18 | 1.85 | 652230.00 | 327.00 | 12.65 | -1.43 | 0.22 | 55.82 | 27.04 | NaN | 4821.06 | 20799.12 | 27.34 | 6.12 | 10.00 | NaN | NaN | NaN | 7.80 | 41.62 | -1.50 | 0.26 | 0.39 | 173.00 | 13.75 | 44.85 | 4.76 | 1.97 | 1.65 | 7717726754.86 | -2.48 | 0.30 | -1.72 | 0.21 | -1.50 | 0.23 | 2.69 | 0.54 | 50.55 | NaN | 47.67 | 18.23 | 47.92 | 18410104.44 | 14513617.44 | -1.30 | 0.16 | 6.02 |
| Africa Eastern and Southern | 43.69 | 33.45 | 14632485.85 | NaN | 9.83 | NaN | NaN | 30.99 | 63.33 | 716.08 | 452640.56 | 1116224.04 | 23.42 | 10.38 | NaN | NaN | 0.62 | NaN | 17.78 | 24.07 | NaN | NaN | NaN | NaN | 13.24 | NaN | 6.43 | 2.48 | 2.52 | 366730373780.84 | NaN | NaN | NaN | NaN | NaN | NaN | 4.39 | 2.55 | NaN | NaN | 42.79 | 14.90 | 51.89 | 351919799.95 | 251054684.00 | NaN | NaN | 11.97 |
| Africa Western and Central | 35.83 | 21.19 | 9045959.88 | NaN | 9.66 | NaN | NaN | 41.27 | 81.26 | 127.28 | 153913.71 | 643892.33 | 25.87 | 4.64 | NaN | NaN | 0.15 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 32.29 | NaN | 9.42 | 2.41 | 1.79 | 258364273935.03 | NaN | NaN | NaN | NaN | NaN | NaN | 2.82 | 0.80 | NaN | NaN | 44.23 | 17.59 | 48.33 | 239756352.51 | 153870196.38 | NaN | NaN | 9.83 |
| Albania | 42.15 | 28.50 | 27400.00 | 1485.00 | 23.31 | -0.68 | 0.17 | 99.80 | 40.24 | 1167.18 | 3881.95 | 8614.21 | 99.59 | 15.06 | 2.44 | 6.20 | 0.12 | 66.81 | 16.43 | 24.20 | -0.31 | 0.23 | 0.61 | 82.00 | 14.77 | 71.14 | 25.38 | 2.48 | 2.79 | 7480345027.69 | -0.09 | 0.27 | -0.55 | 0.17 | 0.02 | 0.21 | 3.37 | 2.62 | 47.60 | 31.01 | 22.85 | 7.73 | 71.50 | 2713063.16 | 1574154.24 | 0.03 | 0.15 | 6.40 |
| Algeria | 17.45 | 0.74 | 2381740.10 | 89.00 | 6.83 | -0.64 | 0.18 | 99.06 | 0.32 | 599.61 | 105761.24 | 193574.39 | 10.72 | 8.60 | 1.90 | 5.82 | 0.26 | NaN | NaN | NaN | -0.56 | 0.21 | 0.53 | 157.00 | 19.30 | 48.79 | 16.35 | 2.46 | 3.15 | 71794434279.38 | -1.23 | 0.26 | -0.84 | 0.17 | -0.96 | 0.23 | 6.45 | 3.52 | NaN | 34.37 | 33.52 | 10.73 | 61.42 | 25915130.46 | 10791909.30 | -0.98 | 0.14 | 1.17 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| West Bank and Gaza | 79.33 | 1.59 | 6020.00 | 402.00 | 16.51 | -0.26 | 0.28 | 99.54 | 17.25 | NaN | NaN | NaN | 558.20 | 2.96 | 2.84 | NaN | 0.30 | NaN | 8.36 | 12.43 | -0.82 | 0.33 | 0.57 | 117.00 | 16.77 | 67.16 | 27.35 | NaN | NaN | 9172903448.28 | -1.77 | 0.32 | -0.37 | 0.27 | -0.38 | 0.35 | 5.11 | NaN | 24.00 | 34.52 | 37.09 | 4.29 | 71.79 | 3458736.48 | 907983.03 | -0.87 | 0.19 | 1.35 |
| World | 36.36 | 31.78 | 129717210.21 | NaN | 9.53 | NaN | NaN | 83.60 | 17.34 | 2104.54 | 28173346.98 | 38928765.52 | 41.34 | 5.36 | NaN | NaN | 2.06 | NaN | 14.19 | 26.53 | NaN | NaN | NaN | NaN | 31.47 | NaN | 21.50 | 2.87 | 3.47 | 32366182539682.54 | NaN | NaN | NaN | NaN | NaN | NaN | 4.15 | 5.66 | NaN | NaN | 25.52 | 9.89 | 64.60 | 5407739439.19 | 2920233053.79 | NaN | NaN | 6.17 |
| Yemen, Rep. | 44.52 | 1.04 | 527970.00 | 167.00 | 10.41 | -1.22 | 0.18 | 56.89 | 1.37 | 114.12 | 15957.08 | 33225.43 | 29.21 | 17.50 | 7.13 | 4.18 | NaN | NaN | NaN | NaN | -1.33 | 0.22 | 0.37 | 187.00 | 6.75 | 39.09 | 6.75 | 2.40 | 5.63 | 18230152874.03 | -2.11 | 0.26 | -1.37 | 0.17 | -1.00 | 0.21 | 6.49 | 1.56 | NaN | 35.47 | 46.35 | 14.83 | 54.78 | 15833245.87 | 11515379.17 | -1.29 | 0.14 | 4.59 |
| Zambia | 28.79 | 62.69 | 743390.00 | 1020.00 | 11.79 | -0.56 | 0.16 | 24.77 | 86.38 | 843.76 | 3320.69 | 28515.98 | 11.84 | 36.44 | -0.49 | 3.77 | 0.05 | 105.35 | 15.50 | 18.25 | -0.80 | 0.18 | 0.39 | 85.00 | 30.63 | 57.93 | 4.26 | 2.41 | 1.87 | 8035327954.94 | 0.21 | 0.26 | -0.43 | 0.16 | -0.55 | 0.19 | 3.79 | 1.83 | NaN | 52.69 | 45.67 | 13.65 | 52.34 | 9055472.62 | 5573550.30 | -0.29 | 0.13 | 7.39 |
| Zimbabwe | 35.50 | 46.88 | 386850.00 | 657.58 | 9.63 | -1.23 | 0.16 | 37.57 | 75.06 | 842.78 | 12378.40 | 29168.66 | 24.68 | 79.61 | -20.57 | NaN | NaN | NaN | 14.83 | 18.91 | -1.17 | 0.18 | 0.44 | 140.00 | 7.00 | 53.54 | 8.33 | 2.31 | 3.25 | 8333485469.06 | -0.95 | 0.26 | -1.50 | 0.15 | -1.70 | 0.18 | 9.08 | 1.75 | NaN | 45.93 | 39.59 | 11.92 | 54.74 | 9667335.21 | 6857547.03 | -1.30 | 0.13 | 8.34 |
268 rows × 48 columns
# Selecting columns which have only numerical data
numerical_columns = dataset.select_dtypes(include=[np.number])
non_numerical_columns = dataset.select_dtypes(exclude=[np.number])
print("The Columns which have numerical values are")
numerical_columns.columns.tolist()
The Columns which have numerical values are
['agricultural_land%', 'forest_land%', 'land_area', 'avg_precipitation', 'trade_in_services%', 'control_of_corruption_estimate', 'control_of_corruption_std', 'access_to_electricity%', 'renewvable_energy_consumption%', 'electric_power_consumption', 'CO2_emisions', 'other_greenhouse_emisions', 'population_density', 'inflation_annual%', 'real_interest_rate', 'risk_premium_on_lending', 'research_and_development_expenditure%', 'central_goverment_debt%', 'tax_revenue%', 'expense%', 'goverment_effectiveness_estimate', 'goverment_effectiveness_std', 'human_capital_index', 'doing_business', 'time_to_get_operation_license', 'statistical_performance_indicators', 'individuals_using_internet%', 'logistic_performance_index', 'military_expenditure%', 'GDP_current_US', 'political_stability_estimate', 'political_stability_std', 'rule_of_law_estimate', 'rule_of_law_std', 'regulatory_quality_estimate', 'regulatory_quality_std', 'government_expenditure_on_education%', 'government_health_expenditure%', 'multidimensional_poverty_headcount_ratio%', 'gini_index', 'birth_rate', 'death_rate', 'life_expectancy_at_birth', 'population', 'rural_population', 'voice_and_accountability_estimate', 'voice_and_accountability_std', 'intentional_homicides']
print("The Columns which have non-numerical values are")
non_numerical_columns.columns.tolist()
The Columns which have non-numerical values are
['country', 'date']
pd.set_option('display.float_format', lambda x: '%.2f' % x)
summary_stats = pd.DataFrame({
'Mean': numerical_columns.mean(),
'Median': numerical_columns.median(),
'Std Dev': numerical_columns.std(),
'Min': numerical_columns.min(),
'Max': numerical_columns.max()
})
summary_stats
| Mean | Median | Std Dev | Min | Max | |
|---|---|---|---|---|---|
| agricultural_land% | 37.53 | 37.69 | 20.54 | 0.26 | 93.44 |
| forest_land% | 32.43 | 30.77 | 23.38 | 0.00 | 98.57 |
| land_area | 5250932.58 | 199810.00 | 15098866.06 | 2.03 | 129987020.90 |
| avg_precipitation | 1214.02 | 1110.00 | 810.57 | 18.10 | 3240.00 |
| trade_in_services% | 20.82 | 13.32 | 23.10 | 0.62 | 316.32 |
| control_of_corruption_estimate | -0.03 | -0.26 | 1.00 | -1.92 | 2.46 |
| control_of_corruption_std | 0.21 | 0.17 | 0.10 | 0.11 | 0.99 |
| access_to_electricity% | 80.76 | 98.29 | 28.75 | 0.53 | 100.00 |
| renewvable_energy_consumption% | 31.03 | 20.99 | 29.86 | 0.00 | 98.34 |
| electric_power_consumption | 2885.32 | 1331.11 | 4103.21 | 5.55 | 54799.17 |
| CO2_emisions | 1023985.81 | 23834.75 | 3343747.34 | 0.00 | 35560555.79 |
| other_greenhouse_emisions | 1479214.81 | 50372.71 | 4595983.69 | 7.62 | 48089616.91 |
| population_density | 277.14 | 51.72 | 1447.28 | 0.10 | 21594.80 |
| inflation_annual% | 20.03 | 4.91 | 291.46 | -17.64 | 23773.13 |
| real_interest_rate | 5.43 | 5.48 | 15.60 | -97.69 | 628.32 |
| risk_premium_on_lending | 5.92 | 4.61 | 7.14 | -31.50 | 67.84 |
| research_and_development_expenditure% | 1.04 | 0.73 | 0.92 | 0.01 | 5.44 |
| central_goverment_debt% | 64.28 | 51.28 | 83.58 | -1.17 | 2002.51 |
| tax_revenue% | 17.27 | 15.62 | 12.59 | 0.00 | 177.28 |
| expense% | 28.36 | 25.65 | 25.23 | 0.00 | 378.49 |
| goverment_effectiveness_estimate | -0.03 | -0.18 | 0.99 | -2.45 | 2.43 |
| goverment_effectiveness_std | 0.24 | 0.22 | 0.08 | 0.16 | 1.06 |
| human_capital_index | 0.57 | 0.57 | 0.15 | 0.29 | 0.89 |
| doing_business | 95.93 | 96.00 | 54.82 | 1.00 | 190.00 |
| time_to_get_operation_license | 31.25 | 22.30 | 29.16 | 1.20 | 176.10 |
| statistical_performance_indicators | 61.11 | 59.58 | 17.67 | 11.77 | 90.29 |
| individuals_using_internet% | 23.97 | 8.00 | 29.39 | 0.00 | 100.00 |
| logistic_performance_index | 2.84 | 2.69 | 0.55 | 1.21 | 4.30 |
| military_expenditure% | 2.80 | 2.08 | 2.89 | 0.00 | 117.35 |
| GDP_current_US | 1206984758444.87 | 18427777778.00 | 5412748059129.95 | 8824746.24 | 100562000000000.00 |
| political_stability_estimate | -0.02 | 0.07 | 1.00 | -3.31 | 1.97 |
| political_stability_std | 0.28 | 0.25 | 0.08 | 0.19 | 0.66 |
| rule_of_law_estimate | -0.03 | -0.17 | 1.00 | -2.59 | 2.12 |
| rule_of_law_std | 0.21 | 0.17 | 0.11 | 0.12 | 0.92 |
| regulatory_quality_estimate | -0.03 | -0.15 | 0.99 | -2.55 | 2.26 |
| regulatory_quality_std | 0.24 | 0.21 | 0.08 | 0.15 | 1.08 |
| government_expenditure_on_education% | 4.33 | 4.13 | 1.93 | 0.00 | 44.33 |
| government_health_expenditure% | 3.23 | 2.64 | 2.27 | 0.06 | 22.25 |
| multidimensional_poverty_headcount_ratio% | 26.94 | 24.60 | 11.23 | 2.37 | 74.20 |
| gini_index | 37.97 | 35.80 | 8.96 | 20.70 | 65.80 |
| birth_rate | 28.20 | 27.07 | 12.86 | 5.00 | 58.12 |
| death_rate | 10.49 | 9.20 | 5.36 | 0.80 | 103.53 |
| life_expectancy_at_birth | 64.25 | 66.78 | 11.11 | 11.99 | 85.50 |
| population | 215965715.82 | 6787419.00 | 710295606.01 | 2646.00 | 7951149546.00 |
| rural_population | 123097056.33 | 3148533.00 | 408755719.95 | 0.00 | 3435440919.00 |
| voice_and_accountability_estimate | -0.02 | 0.02 | 1.00 | -2.31 | 1.80 |
| voice_and_accountability_std | 0.17 | 0.14 | 0.07 | 0.10 | 0.73 |
| intentional_homicides | 8.04 | 3.43 | 12.03 | 0.00 | 138.77 |
for column in ['country']:
frequencies = dataset[column].value_counts()
print(f"Frequencies for {column}:\n{frequencies}\n")
Frequencies for country:
Afghanistan 63
Norway 63
Mozambique 63
Myanmar 63
Namibia 63
..
Guatemala 63
Guinea 63
Zimbabwe 63
Turkey 11
Czech Republic 11
Name: country, Length: 268, dtype: int64
# creating scatter plots of features related to consumptions
different_standards_features = [feature for feature in numerical_columns if 'std' in feature]
# Listing down unique features across various standards in the dataset
for feature in different_standards_features:
print(feature, numerical_columns[feature].unique())
control_of_corruption_std [ nan 0.34050697 0.324013 ... 0.15330967 0.15605473 0.13374464] goverment_effectiveness_std [ nan 0.18761755 0.30231553 ... 0.24523363 0.20191254 0.18246593] political_stability_std [ nan 0.4748072 0.4352209 0.45390606 0.43629751 0.34964156 0.30459777 0.30333257 0.28942022 0.30810529 0.30981806 0.29300761 0.28206983 0.27388433 0.24388154 0.24574125 0.20889461 0.21560416 0.219708 0.22803602 0.22710179 0.24134798 0.24531512 0.24781153 0.42171598 0.38143334 0.39620549 0.39288157 0.33050954 0.3160013 0.30034962 0.273783 0.26269111 0.26359117 0.2647393 0.26047391 0.25086629 0.22198433 0.22801979 0.19638619 0.20298342 0.20779568 0.21594997 0.21523696 0.23301464 0.23612024 0.23941578 0.37391231 0.34566841 0.34250641 0.31601527 0.29205847 0.28685582 0.27811614 0.25285429 0.24429454 0.24604777 0.24605513 0.2435471 0.24424958 0.22680736 0.23150824 0.20011306 0.20681481 0.21260111 0.22127773 0.22028312 0.23851493 0.24132012 0.42519823 0.44368258 0.42395952 0.38278705 0.38323367 0.37083346 0.39340416 0.41133508 0.34492502 0.39368939 0.29147407 0.31012049 0.32092151 0.35220391 0.30930543 0.31755972 0.34252042 0.32860956 0.61303073 0.57573354 0.5952037 0.51814151 0.36473665 0.38434094 0.3764399 0.34842056 0.35001713 0.34126225 0.35239929 0.36077529 0.3173849 0.34742737 0.28007093 0.29093313 0.3153305 0.29657856 0.28803214 0.2856389 0.28867644 0.28687262 0.26761767 0.2710602 0.25199843 0.23303711 0.21053849 0.21616668 0.21814875 0.21937421 0.21829574 0.22347608 0.21501607 0.33064833 0.33286983 0.33175615 0.32360187 0.32008725 0.3066951 0.28274652 0.29725292 0.23805772 0.24594994 0.24802686 0.2581048 0.25412327 0.27849552 0.27712518 0.280121 0.31338027 0.28604171 0.28284657 0.2789576 0.2701239 0.24576741 0.2392498 0.24130794 0.24153912 0.23933095 0.23852235 0.21966553 0.22578736 0.19224741 0.19816442 0.2049565 0.21172263 0.21109025 0.22715022 0.22972222 0.23361281 0.25491926 0.24469157 0.36922431 0.36291722 0.33110604 0.33214673 0.31109184 0.30182099 0.29413733 0.26799193 0.27936524 0.23565063 0.24116345 0.24432918 0.25488386 0.2518363 0.27138987 0.27081415 0.27579004 0.2648508 0.25644869 0.25778809 0.20627439 0.20702998 0.20948239 0.21417129 0.21078306 0.4094311 0.30158466 0.29303238 0.31146678 0.31688541 0.29089096 0.29207346 0.27218479 0.26757622 0.26071763 0.24220166 0.24786642 0.21057026 0.21140938 0.21511243 0.22351009 0.22344395 0.23854543 0.2246238 0.22422254 0.23850638 0.30092695 0.24067757 0.32248533 0.30065388 0.29880321 0.28377882 0.27378613 0.26718533 0.24639824 0.25246668 0.21239938 0.22120954 0.22024554 0.23307268 0.23217934 0.24750973 0.27938786 0.22781482 0.23446649 0.20155461 0.20650044 0.21112111 0.21905675 0.21890731 0.28083846 0.28506348 0.25444612 0.26130271 0.21412544 0.22421709 0.31940094 0.26021859 0.25422666 0.25637811 0.26111519 0.25432348 0.2549746 0.23676789 0.24234171 0.21339691 0.21596204 0.22137748 0.23054464 0.22863907 0.24747567 0.2511619 0.41270143 0.37369168 0.34848073 0.34987396 0.33598191 0.301696 0.32470283 0.25993899 0.274313 0.27612472 0.28873286 0.2805832 0.31392911 0.31560937 0.32261878 0.22159612 0.22599231 0.23575321 0.23855036 0.25413197 0.25367594 0.38418293 0.37775585 0.33732522 0.31590858 0.29767811 0.29303577 0.28320315 0.27516082 0.27641359 0.23129664 0.23834883 0.21161032 0.21596734 0.22453751 0.22301143 0.30827102 0.21159795 0.22021282 0.21954472 0.34535113 0.35239625 0.35511994 0.32408279 0.28279042 0.23522983 0.24028644 0.21574846 0.20729105 0.61605984 0.56331897 0.5865168 0.31767115 0.31035107 0.31543368 0.30069813 0.22750434 0.23382279 0.25091901 0.20315695 0.29692462 0.31975964 0.25838256 0.26442173 0.27373833 0.29541179 0.28139332 0.26825079 0.26092178 0.26077685 0.25012425 0.2202124 0.22541168 0.23433635 0.23305322 0.33503246 0.32185015 0.25285798 0.24964677 0.2598241 0.25527036 0.278447 0.24932286 0.2493356 0.23826431 0.20558971 0.21667859 0.21929044 0.22001571 0.22282338 0.25724959 0.24824175 0.25913855 0.25533524 0.24842845 0.25208917 0.21673635 0.2158439 0.23223656 0.23450227 0.24199788 0.2189825 0.22619511 0.23028909 0.23981237 0.33061889 0.34074208 0.25632203 0.25662312 0.25485447 0.25432298 0.25727281 0.24857452 0.31808802 0.33536226 0.33652997 0.29732421 0.2863625 0.28704148 0.28352326 0.20469727 0.21174674 0.22587076 0.22490767 0.24445687 0.57297325 0.51557553 0.38941681 0.39309409 0.36971691 0.37238046 0.3587923 0.35336846 0.34713376 0.37334967 0.35292655 0.33482686 0.33296463 0.27125251 0.27162254 0.27852079 0.26927027 0.32501134 0.29617736 0.29935875 0.33216527 0.2662203 0.27790329 0.22736059 0.23850845 0.24296547 0.25318784 0.25010827 0.27416778 0.27140418 0.2756381 0.20764013 0.36495757 0.43122065 0.40466738 0.35024962 0.30580261 0.31325248 0.30338103 0.2884939 0.25766206 0.2495524 0.25141993 0.25938681 0.26575315 0.2789005 0.24528623 0.26037687 0.21727818 0.22237226 0.23856381 0.24709143 0.23905627 0.21883735 0.25635511 0.2533136 0.35292608 0.33928439 0.30628979 0.29855192 0.29035416 0.25236255 0.22433582 0.22917284 0.19573942 0.20172459 0.20956327 0.31597561 0.28244427 0.24325204 0.55676848 0.26156551 0.29082862 0.30512604 0.29294446 0.31788752 0.30454284 0.28560653 0.29156631 0.26292467 0.22045697 0.22943297 0.22910525 0.66076291 0.66481692 0.37632996 0.34571326 0.35369718 0.33061823 0.26252958 0.2402494 0.2479616 0.20795862 0.2169023 0.22246191 0.22273397 0.22512799 0.22663754 0.2210357 0.30950347 0.30273956 0.30383134 0.25638649 0.21410161 0.21498749 0.21831912 0.21932752 0.31034491 0.27880758 0.29344639 0.23686045 0.24504805 0.25440973 0.27262527 0.26320952 0.36136803 0.232476 0.2376782 0.20002316 0.20629324 0.32668373 0.3284497 0.34079763 0.31718811 0.32848206 0.34067583 0.30829489 0.22986214 0.23639268 0.20071347 0.26734936 0.26254615 0.27963638 0.26161137 0.26144174 0.58625197 0.61932492 0.6418249 0.65203184 0.55681706 0.48492041 0.60647321 0.63564318 0.59897769 0.51164603 0.54859662 0.57059056 0.58095372 0.5223909 0.3992306 0.40737543 0.28799811 0.28969649 0.27392066 0.27393797 0.49002594 0.4638792 0.44450817 0.31915247 0.30500919 0.321823 0.32633609 0.28017587 0.27549517 0.27779677 0.28359938 0.30609456 0.46935394 0.45348999 0.47369942 0.46379399 0.46901482 0.42006519 0.44471785 0.41451019 0.38210314 0.40114301 0.39291236 0.38078505 0.41120696 0.39208743 0.36414743 0.35640842 0.30877671 0.2845954 0.43947971 0.3112961 0.24553926 0.2150799 0.21568069 0.36453584 0.21806972 0.21823958 0.22046137 0.28561649 0.2664763 0.26872444 0.60024041 0.5905835 0.5613001 0.54708678 0.22394055 0.2188132 0.32190937 0.31868052 0.31403664 0.29173616 0.28588563 0.25863707 0.26864409 0.24694167 0.21090248 0.21150878 0.21345171 0.21802896 0.2143566 0.64384705 0.39456308 0.37375858 0.23093073 0.24458161 0.21464805 0.21454914 0.34470671 0.35602745 0.26501671 0.25868416 0.26323473 0.2379147 0.28562278 0.21017772 0.21093757 0.21405537 0.30592406] rule_of_law_std [ nan 0.3505094 0.32727668 ... 0.147108 0.1469944 0.14231273] regulatory_quality_std [ nan 0.38636038 0.44084185 ... 0.23550989 0.17099454 0.17649963] voice_and_accountability_std [ nan 0.26145712 0.25608963 ... 0.12136008 0.11163896 0.11021758]
CountryWiseData.columns
Index(['agricultural_land%', 'forest_land%', 'land_area', 'avg_precipitation',
'trade_in_services%', 'control_of_corruption_estimate',
'control_of_corruption_std', 'access_to_electricity%',
'renewvable_energy_consumption%', 'electric_power_consumption',
'CO2_emisions', 'other_greenhouse_emisions', 'population_density',
'inflation_annual%', 'real_interest_rate', 'risk_premium_on_lending',
'research_and_development_expenditure%', 'central_goverment_debt%',
'tax_revenue%', 'expense%', 'goverment_effectiveness_estimate',
'goverment_effectiveness_std', 'human_capital_index', 'doing_business',
'time_to_get_operation_license', 'statistical_performance_indicators',
'individuals_using_internet%', 'logistic_performance_index',
'military_expenditure%', 'GDP_current_US',
'political_stability_estimate', 'political_stability_std',
'rule_of_law_estimate', 'rule_of_law_std',
'regulatory_quality_estimate', 'regulatory_quality_std',
'government_expenditure_on_education%',
'government_health_expenditure%',
'multidimensional_poverty_headcount_ratio%', 'gini_index', 'birth_rate',
'death_rate', 'life_expectancy_at_birth', 'population',
'rural_population', 'voice_and_accountability_estimate',
'voice_and_accountability_std', 'intentional_homicides'],
dtype='object')
CountryWiseData.head(10)
| agricultural_land% | forest_land% | land_area | avg_precipitation | trade_in_services% | control_of_corruption_estimate | control_of_corruption_std | access_to_electricity% | renewvable_energy_consumption% | electric_power_consumption | CO2_emisions | other_greenhouse_emisions | population_density | inflation_annual% | real_interest_rate | risk_premium_on_lending | research_and_development_expenditure% | central_goverment_debt% | tax_revenue% | expense% | goverment_effectiveness_estimate | goverment_effectiveness_std | human_capital_index | doing_business | time_to_get_operation_license | statistical_performance_indicators | individuals_using_internet% | logistic_performance_index | military_expenditure% | GDP_current_US | political_stability_estimate | political_stability_std | rule_of_law_estimate | rule_of_law_std | regulatory_quality_estimate | regulatory_quality_std | government_expenditure_on_education% | government_health_expenditure% | multidimensional_poverty_headcount_ratio% | gini_index | birth_rate | death_rate | life_expectancy_at_birth | population | rural_population | voice_and_accountability_estimate | voice_and_accountability_std | intentional_homicides | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| country | ||||||||||||||||||||||||||||||||||||||||||||||||
| Afghanistan | 58.18 | 1.85 | 652230.00 | 327.00 | 12.65 | -1.43 | 0.22 | 55.82 | 27.04 | NaN | 4821.06 | 20799.12 | 27.34 | 6.12 | 10.00 | NaN | NaN | NaN | 7.80 | 41.62 | -1.50 | 0.26 | 0.39 | 173.00 | 13.75 | 44.85 | 4.76 | 1.97 | 1.65 | 7717726754.86 | -2.48 | 0.30 | -1.72 | 0.21 | -1.50 | 0.23 | 2.69 | 0.54 | 50.55 | NaN | 47.67 | 18.23 | 47.92 | 18410104.44 | 14513617.44 | -1.30 | 0.16 | 6.02 |
| Africa Eastern and Southern | 43.69 | 33.45 | 14632485.85 | NaN | 9.83 | NaN | NaN | 30.99 | 63.33 | 716.08 | 452640.56 | 1116224.04 | 23.42 | 10.38 | NaN | NaN | 0.62 | NaN | 17.78 | 24.07 | NaN | NaN | NaN | NaN | 13.24 | NaN | 6.43 | 2.48 | 2.52 | 366730373780.84 | NaN | NaN | NaN | NaN | NaN | NaN | 4.39 | 2.55 | NaN | NaN | 42.79 | 14.90 | 51.89 | 351919799.95 | 251054684.00 | NaN | NaN | 11.97 |
| Africa Western and Central | 35.83 | 21.19 | 9045959.88 | NaN | 9.66 | NaN | NaN | 41.27 | 81.26 | 127.28 | 153913.71 | 643892.33 | 25.87 | 4.64 | NaN | NaN | 0.15 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 32.29 | NaN | 9.42 | 2.41 | 1.79 | 258364273935.03 | NaN | NaN | NaN | NaN | NaN | NaN | 2.82 | 0.80 | NaN | NaN | 44.23 | 17.59 | 48.33 | 239756352.51 | 153870196.38 | NaN | NaN | 9.83 |
| Albania | 42.15 | 28.50 | 27400.00 | 1485.00 | 23.31 | -0.68 | 0.17 | 99.80 | 40.24 | 1167.18 | 3881.95 | 8614.21 | 99.59 | 15.06 | 2.44 | 6.20 | 0.12 | 66.81 | 16.43 | 24.20 | -0.31 | 0.23 | 0.61 | 82.00 | 14.77 | 71.14 | 25.38 | 2.48 | 2.79 | 7480345027.69 | -0.09 | 0.27 | -0.55 | 0.17 | 0.02 | 0.21 | 3.37 | 2.62 | 47.60 | 31.01 | 22.85 | 7.73 | 71.50 | 2713063.16 | 1574154.24 | 0.03 | 0.15 | 6.40 |
| Algeria | 17.45 | 0.74 | 2381740.10 | 89.00 | 6.83 | -0.64 | 0.18 | 99.06 | 0.32 | 599.61 | 105761.24 | 193574.39 | 10.72 | 8.60 | 1.90 | 5.82 | 0.26 | NaN | NaN | NaN | -0.56 | 0.21 | 0.53 | 157.00 | 19.30 | 48.79 | 16.35 | 2.46 | 3.15 | 71794434279.38 | -1.23 | 0.26 | -0.84 | 0.17 | -0.96 | 0.23 | 6.45 | 3.52 | NaN | 34.37 | 33.52 | 10.73 | 61.42 | 25915130.46 | 10791909.30 | -0.98 | 0.14 | 1.17 |
| American Samoa | 19.98 | 87.94 | 200.00 | NaN | NaN | 0.88 | 0.41 | NaN | 0.11 | NaN | NaN | NaN | 213.19 | NaN | NaN | NaN | 0.32 | NaN | NaN | NaN | 0.46 | 0.44 | NaN | NaN | NaN | NaN | 0.00 | NaN | NaN | 601100000.00 | 1.01 | 0.36 | 1.18 | 0.44 | 0.26 | 0.43 | 14.72 | NaN | NaN | NaN | 17.42 | 4.72 | NaN | 42344.19 | 7513.62 | 0.95 | 0.49 | 7.18 |
| Andorra | 48.33 | 34.04 | 470.00 | NaN | 83.14 | 1.27 | 0.42 | 100.00 | 16.76 | NaN | 484.89 | 565.57 | 105.76 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 1.59 | 0.41 | NaN | NaN | NaN | NaN | 44.98 | NaN | NaN | 1659435438.30 | 1.36 | 0.37 | 1.34 | 0.40 | 1.30 | 0.42 | 2.75 | 4.49 | NaN | NaN | 10.77 | 3.53 | NaN | 50010.68 | 5107.17 | 1.32 | 0.25 | 0.60 |
| Angola | 38.47 | 59.54 | 1246700.00 | 1010.00 | 22.19 | -1.26 | 0.18 | 35.73 | 62.64 | 99.22 | 18463.87 | 66559.93 | 11.50 | 358.38 | -9.02 | 11.00 | 0.03 | NaN | 16.95 | 21.98 | -1.09 | 0.21 | 0.36 | 177.00 | 29.40 | 50.15 | 8.03 | 2.28 | 6.06 | 39726806446.37 | -0.77 | 0.26 | -1.27 | 0.17 | -1.07 | 0.22 | 3.12 | 1.52 | 54.00 | 48.67 | 48.39 | 18.85 | 46.68 | 14847769.44 | 7520616.13 | -1.15 | 0.14 | 4.44 |
| Antigua and Barbuda | 20.04 | 20.72 | 440.00 | 1030.00 | 73.38 | 0.83 | 0.37 | 98.15 | 0.14 | NaN | 372.44 | 774.68 | 162.56 | 2.00 | 7.65 | 4.74 | NaN | NaN | NaN | NaN | 0.34 | 0.40 | 0.59 | 113.00 | 2.90 | NaN | 32.55 | 2.90 | NaN | 809735374.75 | 0.87 | 0.34 | 0.74 | 0.34 | 0.56 | 0.38 | 2.84 | 2.89 | NaN | NaN | 20.73 | 6.62 | 73.19 | 71967.87 | 49358.71 | 0.55 | 0.24 | 9.30 |
| Arab World | 35.02 | 3.36 | 13534656.22 | NaN | 16.18 | NaN | NaN | 85.23 | 5.85 | 1201.84 | 1246017.27 | 1957551.81 | 17.92 | 5.69 | NaN | NaN | 0.64 | NaN | 5.33 | 23.23 | NaN | NaN | NaN | NaN | 15.28 | NaN | 19.81 | 2.69 | 7.02 | 1108382062269.78 | NaN | NaN | NaN | NaN | NaN | NaN | 3.89 | 2.47 | NaN | NaN | 35.87 | 10.31 | 61.15 | 246121661.13 | 118509632.79 | NaN | NaN | 4.23 |
# Histograms for various numerical parameters
data=CountryWiseData.head(90)
for feature in numerical_columns.columns:
# sns.histplot(df[column_name], kde=True, color='blue')
# # Add labels and title
# plt.xlabel(column_name)
# plt.ylabel('Frequency')
# plt.title(f'Distribution of {column_name}')
plt.figure(figsize=(3,3))
sns.histplot(CountryWiseData[feature],kde=True, color='blue')
plt.xlabel(feature)
plt.ylabel('Count')
# plt.yticks(data.index.tolist())
plt.title(f"Graph for {feature}")
plt.show()
Majority of the indicators show a right skewed distribution which shows the disparity of resource distribution between rich countries and poor countries.
for feature in numerical_columns.columns:
plt.figure(figsize=(10, 20))
data=CountryWiseData.copy()
plt.scatter(data[feature],data.index.tolist())
plt.xlabel(feature)
plt.ylabel('Countries')
plt.title(f"Graph for {feature}")
# plt.show()
C:\Users\Bilal\AppData\Local\Temp\ipykernel_18668\588317347.py:2: RuntimeWarning: More than 20 figures have been opened. Figures created through the pyplot interface (`matplotlib.pyplot.figure`) are retained until explicitly closed and may consume too much memory. (To control this warning, see the rcParam `figure.max_open_warning`). plt.figure(figsize=(10, 20))
Not many conclusions can be drawn from this data since this type of represendtion is not Suitable for the given data.
data=CountryWiseData.head(90)
for feature in numerical_columns.columns:
plt.figure(figsize=(10, 20))
plt.barh(data.index.tolist(),data[feature],height=0.2)
plt.xlabel(feature)
plt.ylabel('Countries')
plt.yticks(data.index.tolist())
plt.title(f"Graph for {feature}")
plt.show()
for feature in numerical_columns.columns:
data=CountryWiseData.copy()
if 0 in data[feature].unique():
pass
else:
plt.figure(figsize=(4, 3))
data[feature]=np.log(data[feature])
data.boxplot(column=feature)
plt.ylabel(feature)
plt.title(feature)
plt.show()
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\arraylike.py:397: RuntimeWarning: invalid value encountered in log result = getattr(ufunc, method)(*inputs, **kwargs)
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\arraylike.py:397: RuntimeWarning: invalid value encountered in log result = getattr(ufunc, method)(*inputs, **kwargs)
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\arraylike.py:397: RuntimeWarning: invalid value encountered in log result = getattr(ufunc, method)(*inputs, **kwargs)
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\arraylike.py:397: RuntimeWarning: invalid value encountered in log result = getattr(ufunc, method)(*inputs, **kwargs)
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\arraylike.py:397: RuntimeWarning: invalid value encountered in log result = getattr(ufunc, method)(*inputs, **kwargs)
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\arraylike.py:397: RuntimeWarning: invalid value encountered in log result = getattr(ufunc, method)(*inputs, **kwargs)
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\arraylike.py:397: RuntimeWarning: invalid value encountered in log result = getattr(ufunc, method)(*inputs, **kwargs)
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\arraylike.py:397: RuntimeWarning: invalid value encountered in log result = getattr(ufunc, method)(*inputs, **kwargs)
Using Box plots we can see that some data points are evently distributed while some data points have a lot of outliers. As a general trend, parameters which show a good quality of life have quite a few outliers showing disparity between first world countries and the rest of the world
## Plotting data to understand the relationship of Agricultural Land and forest land
CountryWiseData.head(5)
| agricultural_land% | forest_land% | land_area | avg_precipitation | trade_in_services% | control_of_corruption_estimate | control_of_corruption_std | access_to_electricity% | renewvable_energy_consumption% | electric_power_consumption | CO2_emisions | other_greenhouse_emisions | population_density | inflation_annual% | real_interest_rate | risk_premium_on_lending | research_and_development_expenditure% | central_goverment_debt% | tax_revenue% | expense% | goverment_effectiveness_estimate | goverment_effectiveness_std | human_capital_index | doing_business | time_to_get_operation_license | statistical_performance_indicators | individuals_using_internet% | logistic_performance_index | military_expenditure% | GDP_current_US | political_stability_estimate | political_stability_std | rule_of_law_estimate | rule_of_law_std | regulatory_quality_estimate | regulatory_quality_std | government_expenditure_on_education% | government_health_expenditure% | multidimensional_poverty_headcount_ratio% | gini_index | birth_rate | death_rate | life_expectancy_at_birth | population | rural_population | voice_and_accountability_estimate | voice_and_accountability_std | intentional_homicides | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| country | ||||||||||||||||||||||||||||||||||||||||||||||||
| Afghanistan | 58.18 | 1.85 | 652230.00 | 327.00 | 12.65 | -1.43 | 0.22 | 55.82 | 27.04 | NaN | 4821.06 | 20799.12 | 27.34 | 6.12 | 10.00 | NaN | NaN | NaN | 7.80 | 41.62 | -1.50 | 0.26 | 0.39 | 173.00 | 13.75 | 44.85 | 4.76 | 1.97 | 1.65 | 7717726754.86 | -2.48 | 0.30 | -1.72 | 0.21 | -1.50 | 0.23 | 2.69 | 0.54 | 50.55 | NaN | 47.67 | 18.23 | 47.92 | 18410104.44 | 14513617.44 | -1.30 | 0.16 | 6.02 |
| Africa Eastern and Southern | 43.69 | 33.45 | 14632485.85 | NaN | 9.83 | NaN | NaN | 30.99 | 63.33 | 716.08 | 452640.56 | 1116224.04 | 23.42 | 10.38 | NaN | NaN | 0.62 | NaN | 17.78 | 24.07 | NaN | NaN | NaN | NaN | 13.24 | NaN | 6.43 | 2.48 | 2.52 | 366730373780.84 | NaN | NaN | NaN | NaN | NaN | NaN | 4.39 | 2.55 | NaN | NaN | 42.79 | 14.90 | 51.89 | 351919799.95 | 251054684.00 | NaN | NaN | 11.97 |
| Africa Western and Central | 35.83 | 21.19 | 9045959.88 | NaN | 9.66 | NaN | NaN | 41.27 | 81.26 | 127.28 | 153913.71 | 643892.33 | 25.87 | 4.64 | NaN | NaN | 0.15 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 32.29 | NaN | 9.42 | 2.41 | 1.79 | 258364273935.03 | NaN | NaN | NaN | NaN | NaN | NaN | 2.82 | 0.80 | NaN | NaN | 44.23 | 17.59 | 48.33 | 239756352.51 | 153870196.38 | NaN | NaN | 9.83 |
| Albania | 42.15 | 28.50 | 27400.00 | 1485.00 | 23.31 | -0.68 | 0.17 | 99.80 | 40.24 | 1167.18 | 3881.95 | 8614.21 | 99.59 | 15.06 | 2.44 | 6.20 | 0.12 | 66.81 | 16.43 | 24.20 | -0.31 | 0.23 | 0.61 | 82.00 | 14.77 | 71.14 | 25.38 | 2.48 | 2.79 | 7480345027.69 | -0.09 | 0.27 | -0.55 | 0.17 | 0.02 | 0.21 | 3.37 | 2.62 | 47.60 | 31.01 | 22.85 | 7.73 | 71.50 | 2713063.16 | 1574154.24 | 0.03 | 0.15 | 6.40 |
| Algeria | 17.45 | 0.74 | 2381740.10 | 89.00 | 6.83 | -0.64 | 0.18 | 99.06 | 0.32 | 599.61 | 105761.24 | 193574.39 | 10.72 | 8.60 | 1.90 | 5.82 | 0.26 | NaN | NaN | NaN | -0.56 | 0.21 | 0.53 | 157.00 | 19.30 | 48.79 | 16.35 | 2.46 | 3.15 | 71794434279.38 | -1.23 | 0.26 | -0.84 | 0.17 | -0.96 | 0.23 | 6.45 | 3.52 | NaN | 34.37 | 33.52 | 10.73 | 61.42 | 25915130.46 | 10791909.30 | -0.98 | 0.14 | 1.17 |
plt.figure(figsize=(4, 3))
x=CountryWiseData['population']
y=CountryWiseData['rural_population']
plt.scatter(x,y,s=8)
plt.ylabel('Population')
plt.xlabel('Rural Population')
plt.title('Relationship between Total Population and Rural Population')
plt.show()
plt.figure(figsize=(4, 3))
x=CountryWiseData['individuals_using_internet%']
y=CountryWiseData['human_capital_index']
plt.scatter(x,y,s=8)
plt.xlabel('Percentage of People accessing internet')
plt.ylabel('Human Capital Index')
plt.title('Relationship between Human Capital Index and Internet Accessiblity')
plt.show()
plt.figure(figsize=(4, 3))
x=CountryWiseData['control_of_corruption_std']
y=CountryWiseData['voice_and_accountability_std']
plt.scatter(x,y,s=8)
plt.xlabel('Control of Corruption')
plt.ylabel('Voice and Accountablity')
plt.title('Relationship between Corruption Control and Accountablity')
plt.show()
In this part I have tried to explore the relation between the Human Capital Index (HCI) with different parameters available in the dataset, a general trend shown with parameters which correlate to a higher quality of life such as access to electricity and internet, longers life expectency have a directly propotional relation with HCI. While other parameters such as Percentage of Forest Land, and Land Area have low corrleation with HCI
df = CountryWiseData.copy()
for column_name in CountryWiseData.columns:
df[column_name].replace('None', pd.NA, inplace=True)
# Convert the column to a numeric type (if it's not already)
df[column_name] = pd.to_numeric(df[column_name], errors='coerce')
# Calculate the median
median_value = df[column_name].median()
# Impute NaN values with the median
df[column_name].fillna(median_value, inplace=True)
dataset=CountryWiseData.copy()
features_with_na=[features for features in dataset.columns if dataset[features].isnull().sum()>1]
for feature in features_with_na:
print(feature, np.round(dataset[feature].isnull().mean(), 2), '\b% missing values')
print()
print()
print()
features_with_nan = [feature for feature in dataset.columns if dataset[feature].isna().sum() > 1]
for feature in features_with_nan:
print(f"{feature}: {np.round(dataset[feature].isna().mean() * 100, 2)}% NaN values")
agricultural_land% 0.04 % missing values forest_land% 0.02 % missing values land_area 0.02 % missing values avg_precipitation 0.32 % missing values trade_in_services% 0.08 % missing values control_of_corruption_estimate 0.24 % missing values control_of_corruption_std 0.24 % missing values access_to_electricity% 0.01 % missing values renewvable_energy_consumption% 0.03 % missing values electric_power_consumption 0.31 % missing values CO2_emisions 0.11 % missing values other_greenhouse_emisions 0.11 % missing values population_density 0.02 % missing values inflation_annual% 0.1 % missing values real_interest_rate 0.45 % missing values risk_premium_on_lending 0.68 % missing values research_and_development_expenditure% 0.3 % missing values central_goverment_debt% 0.5 % missing values tax_revenue% 0.27 % missing values expense% 0.31 % missing values goverment_effectiveness_estimate 0.24 % missing values goverment_effectiveness_std 0.24 % missing values human_capital_index 0.35 % missing values doing_business 0.29 % missing values time_to_get_operation_license 0.26 % missing values statistical_performance_indicators 0.35 % missing values individuals_using_internet% 0.03 % missing values logistic_performance_index 0.19 % missing values military_expenditure% 0.21 % missing values GDP_current_US 0.02 % missing values political_stability_estimate 0.24 % missing values political_stability_std 0.24 % missing values rule_of_law_estimate 0.24 % missing values rule_of_law_std 0.24 % missing values regulatory_quality_estimate 0.24 % missing values regulatory_quality_std 0.24 % missing values government_expenditure_on_education% 0.07 % missing values government_health_expenditure% 0.11 % missing values multidimensional_poverty_headcount_ratio% 0.75 % missing values gini_index 0.38 % missing values birth_rate 0.01 % missing values death_rate 0.01 % missing values life_expectancy_at_birth 0.03 % missing values population 0.01 % missing values rural_population 0.02 % missing values voice_and_accountability_estimate 0.24 % missing values voice_and_accountability_std 0.24 % missing values intentional_homicides 0.09 % missing values agricultural_land%: 4.1% NaN values forest_land%: 2.24% NaN values land_area: 1.87% NaN values avg_precipitation: 32.09% NaN values trade_in_services%: 7.84% NaN values control_of_corruption_estimate: 24.25% NaN values control_of_corruption_std: 24.25% NaN values access_to_electricity%: 1.49% NaN values renewvable_energy_consumption%: 2.99% NaN values electric_power_consumption: 30.6% NaN values CO2_emisions: 10.82% NaN values other_greenhouse_emisions: 10.82% NaN values population_density: 1.87% NaN values inflation_annual%: 10.45% NaN values real_interest_rate: 44.78% NaN values risk_premium_on_lending: 67.54% NaN values research_and_development_expenditure%: 29.85% NaN values central_goverment_debt%: 50.37% NaN values tax_revenue%: 26.87% NaN values expense%: 31.34% NaN values goverment_effectiveness_estimate: 24.25% NaN values goverment_effectiveness_std: 24.25% NaN values human_capital_index: 35.07% NaN values doing_business: 29.48% NaN values time_to_get_operation_license: 25.75% NaN values statistical_performance_indicators: 35.07% NaN values individuals_using_internet%: 2.61% NaN values logistic_performance_index: 19.03% NaN values military_expenditure%: 20.9% NaN values GDP_current_US: 2.24% NaN values political_stability_estimate: 23.51% NaN values political_stability_std: 23.51% NaN values rule_of_law_estimate: 23.51% NaN values rule_of_law_std: 23.51% NaN values regulatory_quality_estimate: 24.25% NaN values regulatory_quality_std: 24.25% NaN values government_expenditure_on_education%: 6.72% NaN values government_health_expenditure%: 11.19% NaN values multidimensional_poverty_headcount_ratio%: 75.0% NaN values gini_index: 37.69% NaN values birth_rate: 1.12% NaN values death_rate: 1.12% NaN values life_expectancy_at_birth: 2.99% NaN values population: 1.12% NaN values rural_population: 1.87% NaN values voice_and_accountability_estimate: 23.51% NaN values voice_and_accountability_std: 23.51% NaN values intentional_homicides: 8.96% NaN values
As evident from the histograms drawn in Question 01, majority of the data points are right skewed i.e. majority of the data points are concnetrated towards the left.
In such a situtation the median imputation for filling in the missing values is a viable technique which will also preserve the skewed nature of the data.
df=CountryWiseData.copy()
numeric_columns = df.select_dtypes(include=['number']).columns
numeric_columns = [col for col in numeric_columns if col != 'country' and col !='date']
for column in numeric_columns:
median_value = df[column].median()
df[column].fillna(median_value, inplace=True)
dataset=df
features_with_na=[features for features in dataset.columns if dataset[features].isnull().sum()>1]
for feature in features_with_na:
print(feature, np.round(dataset[feature].isnull().mean(), 2), '\b% missing values')
print()
print()
print()
features_with_nan = [feature for feature in dataset.columns if dataset[feature].isna().sum() > 1]
for feature in features_with_nan:
print(f"{feature}: {np.round(dataset[feature].isna().mean() * 100, 2)}% NaN values")
# Putting new data in CountryWiseData DataFrame
CountryWiseDataNew=df.groupby('country').mean()
for feature in df.columns:
data=df.copy()
if 0 in data[feature].unique():
pass
else:
plt.figure(figsize=(4, 3))
data.boxplot(column=feature)
plt.ylabel(feature)
plt.title(feature)
plt.show()
for feature in df.columns:
data=df.copy()
if 0 in data[feature].unique():
pass
else:
# Generate some sample data (replace this with your actual data)
data1 = data[feature]
data2 = np.log(data[feature])
# Create a figure and a set of subplots
fig, axs = plt.subplots(1, 2, figsize=(10, 5))
# Create the first box plot
axs[0].boxplot(data1)
axs[0].set_title(f'{feature} boxplot')
# Create the second box plot
axs[1].boxplot(data2)
axs[1].set_title(f'{feature} boxplot with logarithmic transformation')
# Show the plots
plt.tight_layout()
plt.show()
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\arraylike.py:397: RuntimeWarning: invalid value encountered in log result = getattr(ufunc, method)(*inputs, **kwargs)
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\arraylike.py:397: RuntimeWarning: invalid value encountered in log result = getattr(ufunc, method)(*inputs, **kwargs)
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\arraylike.py:397: RuntimeWarning: invalid value encountered in log result = getattr(ufunc, method)(*inputs, **kwargs)
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\arraylike.py:397: RuntimeWarning: invalid value encountered in log result = getattr(ufunc, method)(*inputs, **kwargs)
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\arraylike.py:397: RuntimeWarning: invalid value encountered in log result = getattr(ufunc, method)(*inputs, **kwargs)
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\arraylike.py:397: RuntimeWarning: invalid value encountered in log result = getattr(ufunc, method)(*inputs, **kwargs)
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\arraylike.py:397: RuntimeWarning: invalid value encountered in log result = getattr(ufunc, method)(*inputs, **kwargs)
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\arraylike.py:397: RuntimeWarning: invalid value encountered in log result = getattr(ufunc, method)(*inputs, **kwargs)
As we can see on comparative analysis the parameters which were previously not very understandable become much more correlated once we apply logarithmic transformation. One more key observation is that parameters which were previously understandable do get messed up after logarithmic transformation. Thus log transformation should be applied with caution and with thorough analysis.
# Histograms for various numerical parameters
data=CountryWiseDataNew.copy()
for feature in numerical_columns.columns:
plt.figure(figsize=(3,3))
sns.histplot(CountryWiseData[feature],kde=True, color='blue')
plt.xlabel(feature)
plt.ylabel('Count')
plt.title(f"Graph for {feature}")
plt.show()
Given the various histograms we can see that the majority of the parameters are right skewed while some parameters like "Access to Electricity" are left skewed.
for feature in numerical_columns.columns:
print(f"Kurtosis value for {feature} is {np.round(stats.kurtosis(CountryWiseDataNew[feature]),2)}.")
Kurtosis value for agricultural_land% is -0.56. Kurtosis value for forest_land% is -0.12. Kurtosis value for land_area is 29.57. Kurtosis value for avg_precipitation is 0.9. Kurtosis value for trade_in_services% is 19.85. Kurtosis value for control_of_corruption_estimate is 0.38. Kurtosis value for control_of_corruption_std is 32.65. Kurtosis value for access_to_electricity% is -0.02. Kurtosis value for renewvable_energy_consumption% is -0.71. Kurtosis value for electric_power_consumption is 12.14. Kurtosis value for CO2_emisions is 29.73. Kurtosis value for other_greenhouse_emisions is 30.24. Kurtosis value for population_density is 99.17. Kurtosis value for inflation_annual% is 72.93. Kurtosis value for real_interest_rate is 13.61. Kurtosis value for risk_premium_on_lending is 45.4. Kurtosis value for research_and_development_expenditure% is 4.8. Kurtosis value for central_goverment_debt% is 78.48. Kurtosis value for tax_revenue% is 128.69. Kurtosis value for expense% is 150.04. Kurtosis value for goverment_effectiveness_estimate is 0.12. Kurtosis value for goverment_effectiveness_std is 29.4. Kurtosis value for human_capital_index is 0.1. Kurtosis value for doing_business is -0.44. Kurtosis value for time_to_get_operation_license is 9.19. Kurtosis value for statistical_performance_indicators is 0.59. Kurtosis value for individuals_using_internet% is -0.12. Kurtosis value for logistic_performance_index is 0.71. Kurtosis value for military_expenditure% is 33.28. Kurtosis value for GDP_current_US is 38.15. Kurtosis value for political_stability_estimate is 0.34. Kurtosis value for political_stability_std is 19.69. Kurtosis value for rule_of_law_estimate is -0.16. Kurtosis value for rule_of_law_std is 16.66. Kurtosis value for regulatory_quality_estimate is 0.03. Kurtosis value for regulatory_quality_std is 28.46. Kurtosis value for government_expenditure_on_education% is 9.9. Kurtosis value for government_health_expenditure% is 1.47. Kurtosis value for multidimensional_poverty_headcount_ratio% is 10.3. Kurtosis value for gini_index is 1.81. Kurtosis value for birth_rate is -1.16. Kurtosis value for death_rate is 0.93. Kurtosis value for life_expectancy_at_birth is -0.73. Kurtosis value for population is 27.87. Kurtosis value for rural_population is 25.94. Kurtosis value for voice_and_accountability_estimate is -0.44. Kurtosis value for voice_and_accountability_std is 16.18. Kurtosis value for intentional_homicides is 11.65.
We generally interpret kurtosis on the following basis:
A visual inspection of histogram of each parameter and is corresponding kurtosis verifies this obersvation that metrics with >0 kurtosis indeed have high peaks, metrics with<0 kurtosis have lower peaks, and metrics with kurtosis value closer to 0 are normally distributed.
data = CountryWiseData.copy()
for feature in numerical_columns.columns:
if feature != 'human_capital_index':
plt.figure(figsize=(4, 3))
x=CountryWiseData['human_capital_index']
y=CountryWiseData[feature]
plt.scatter(x,y,s=8)
plt.xlabel('human_capital_index')
plt.ylabel(feature)
plt.title(f'Relationship between human_capital_index and {feature}')
plt.show()
# Grouping data on the basis of countires:
GroupedData=first_data.copy()
GroupedData=GroupedData.groupby('country').mean()
GroupedData
| agricultural_land% | forest_land% | land_area | avg_precipitation | trade_in_services% | control_of_corruption_estimate | control_of_corruption_std | access_to_electricity% | renewvable_energy_consumption% | electric_power_consumption | CO2_emisions | other_greenhouse_emisions | population_density | inflation_annual% | real_interest_rate | risk_premium_on_lending | research_and_development_expenditure% | central_goverment_debt% | tax_revenue% | expense% | goverment_effectiveness_estimate | goverment_effectiveness_std | human_capital_index | doing_business | time_to_get_operation_license | statistical_performance_indicators | individuals_using_internet% | logistic_performance_index | military_expenditure% | GDP_current_US | political_stability_estimate | political_stability_std | rule_of_law_estimate | rule_of_law_std | regulatory_quality_estimate | regulatory_quality_std | government_expenditure_on_education% | government_health_expenditure% | multidimensional_poverty_headcount_ratio% | gini_index | birth_rate | death_rate | life_expectancy_at_birth | population | rural_population | voice_and_accountability_estimate | voice_and_accountability_std | intentional_homicides | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| country | ||||||||||||||||||||||||||||||||||||||||||||||||
| Afghanistan | 58.18 | 1.85 | 652230.00 | 327.00 | 12.65 | -1.43 | 0.22 | 55.82 | 27.04 | NaN | 4821.06 | 20799.12 | 27.34 | 6.12 | 10.00 | NaN | NaN | NaN | 7.80 | 41.62 | -1.50 | 0.26 | 0.39 | 173.00 | 13.75 | 44.85 | 4.76 | 1.97 | 1.65 | 7717726754.86 | -2.48 | 0.30 | -1.72 | 0.21 | -1.50 | 0.23 | 2.69 | 0.54 | 50.55 | NaN | 47.67 | 18.23 | 47.92 | 18410104.44 | 14513617.44 | -1.30 | 0.16 | 6.02 |
| Africa Eastern and Southern | 43.69 | 33.45 | 14632485.85 | NaN | 9.83 | NaN | NaN | 30.99 | 63.33 | 716.08 | 452640.56 | 1116224.04 | 23.42 | 10.38 | NaN | NaN | 0.62 | NaN | 17.78 | 24.07 | NaN | NaN | NaN | NaN | 13.24 | NaN | 6.43 | 2.48 | 2.52 | 366730373780.84 | NaN | NaN | NaN | NaN | NaN | NaN | 4.39 | 2.55 | NaN | NaN | 42.79 | 14.90 | 51.89 | 351919799.95 | 251054684.00 | NaN | NaN | 11.97 |
| Africa Western and Central | 35.83 | 21.19 | 9045959.88 | NaN | 9.66 | NaN | NaN | 41.27 | 81.26 | 127.28 | 153913.71 | 643892.33 | 25.87 | 4.64 | NaN | NaN | 0.15 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 32.29 | NaN | 9.42 | 2.41 | 1.79 | 258364273935.03 | NaN | NaN | NaN | NaN | NaN | NaN | 2.82 | 0.80 | NaN | NaN | 44.23 | 17.59 | 48.33 | 239756352.51 | 153870196.38 | NaN | NaN | 9.83 |
| Albania | 42.15 | 28.50 | 27400.00 | 1485.00 | 23.31 | -0.68 | 0.17 | 99.80 | 40.24 | 1167.18 | 3881.95 | 8614.21 | 99.59 | 15.06 | 2.44 | 6.20 | 0.12 | 66.81 | 16.43 | 24.20 | -0.31 | 0.23 | 0.61 | 82.00 | 14.77 | 71.14 | 25.38 | 2.48 | 2.79 | 7480345027.69 | -0.09 | 0.27 | -0.55 | 0.17 | 0.02 | 0.21 | 3.37 | 2.62 | 47.60 | 31.01 | 22.85 | 7.73 | 71.50 | 2713063.16 | 1574154.24 | 0.03 | 0.15 | 6.40 |
| Algeria | 17.45 | 0.74 | 2381740.10 | 89.00 | 6.83 | -0.64 | 0.18 | 99.06 | 0.32 | 599.61 | 105761.24 | 193574.39 | 10.72 | 8.60 | 1.90 | 5.82 | 0.26 | NaN | NaN | NaN | -0.56 | 0.21 | 0.53 | 157.00 | 19.30 | 48.79 | 16.35 | 2.46 | 3.15 | 71794434279.38 | -1.23 | 0.26 | -0.84 | 0.17 | -0.96 | 0.23 | 6.45 | 3.52 | NaN | 34.37 | 33.52 | 10.73 | 61.42 | 25915130.46 | 10791909.30 | -0.98 | 0.14 | 1.17 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| West Bank and Gaza | 79.33 | 1.59 | 6020.00 | 402.00 | 16.51 | -0.26 | 0.28 | 99.54 | 17.25 | NaN | NaN | NaN | 558.20 | 2.96 | 2.84 | NaN | 0.30 | NaN | 8.36 | 12.43 | -0.82 | 0.33 | 0.57 | 117.00 | 16.77 | 67.16 | 27.35 | NaN | NaN | 9172903448.28 | -1.77 | 0.32 | -0.37 | 0.27 | -0.38 | 0.35 | 5.11 | NaN | 24.00 | 34.52 | 37.09 | 4.29 | 71.79 | 3458736.48 | 907983.03 | -0.87 | 0.19 | 1.35 |
| World | 36.36 | 31.78 | 129717210.21 | NaN | 9.53 | NaN | NaN | 83.60 | 17.34 | 2104.54 | 28173346.98 | 38928765.52 | 41.34 | 5.36 | NaN | NaN | 2.06 | NaN | 14.19 | 26.53 | NaN | NaN | NaN | NaN | 31.47 | NaN | 21.50 | 2.87 | 3.47 | 32366182539682.54 | NaN | NaN | NaN | NaN | NaN | NaN | 4.15 | 5.66 | NaN | NaN | 25.52 | 9.89 | 64.60 | 5407739439.19 | 2920233053.79 | NaN | NaN | 6.17 |
| Yemen, Rep. | 44.52 | 1.04 | 527970.00 | 167.00 | 10.41 | -1.22 | 0.18 | 56.89 | 1.37 | 114.12 | 15957.08 | 33225.43 | 29.21 | 17.50 | 7.13 | 4.18 | NaN | NaN | NaN | NaN | -1.33 | 0.22 | 0.37 | 187.00 | 6.75 | 39.09 | 6.75 | 2.40 | 5.63 | 18230152874.03 | -2.11 | 0.26 | -1.37 | 0.17 | -1.00 | 0.21 | 6.49 | 1.56 | NaN | 35.47 | 46.35 | 14.83 | 54.78 | 15833245.87 | 11515379.17 | -1.29 | 0.14 | 4.59 |
| Zambia | 28.79 | 62.69 | 743390.00 | 1020.00 | 11.79 | -0.56 | 0.16 | 24.77 | 86.38 | 843.76 | 3320.69 | 28515.98 | 11.84 | 36.44 | -0.49 | 3.77 | 0.05 | 105.35 | 15.50 | 18.25 | -0.80 | 0.18 | 0.39 | 85.00 | 30.63 | 57.93 | 4.26 | 2.41 | 1.87 | 8035327954.94 | 0.21 | 0.26 | -0.43 | 0.16 | -0.55 | 0.19 | 3.79 | 1.83 | NaN | 52.69 | 45.67 | 13.65 | 52.34 | 9055472.62 | 5573550.30 | -0.29 | 0.13 | 7.39 |
| Zimbabwe | 35.50 | 46.88 | 386850.00 | 657.58 | 9.63 | -1.23 | 0.16 | 37.57 | 75.06 | 842.78 | 12378.40 | 29168.66 | 24.68 | 79.61 | -20.57 | NaN | NaN | NaN | 14.83 | 18.91 | -1.17 | 0.18 | 0.44 | 140.00 | 7.00 | 53.54 | 8.33 | 2.31 | 3.25 | 8333485469.06 | -0.95 | 0.26 | -1.50 | 0.15 | -1.70 | 0.18 | 9.08 | 1.75 | NaN | 45.93 | 39.59 | 11.92 | 54.74 | 9667335.21 | 6857547.03 | -1.30 | 0.13 | 8.34 |
268 rows × 48 columns
## Applying Log transformation on data while plotting box plots
for feature in df.columns:
data=df.copy()
if 0 in data[feature].unique():
pass
else:
# Generate some sample data (replace this with your actual data)
data1 = data[feature]
data2 = np.log(data[feature])
# Create a figure and a set of subplots
fig, axs = plt.subplots(1, 2, figsize=(10, 5))
# Create the first box plot
axs[0].boxplot(data1)
axs[0].set_title(f'{feature} boxplot')
# Create the second box plot
axs[1].boxplot(data2)
axs[1].set_title(f'{feature} boxplot with logarithmic transformation')
# Show the plots
plt.tight_layout()
plt.show()
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\arraylike.py:397: RuntimeWarning: invalid value encountered in log result = getattr(ufunc, method)(*inputs, **kwargs)
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\arraylike.py:397: RuntimeWarning: invalid value encountered in log result = getattr(ufunc, method)(*inputs, **kwargs)
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\arraylike.py:397: RuntimeWarning: invalid value encountered in log result = getattr(ufunc, method)(*inputs, **kwargs)
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\arraylike.py:397: RuntimeWarning: invalid value encountered in log result = getattr(ufunc, method)(*inputs, **kwargs)
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\arraylike.py:397: RuntimeWarning: invalid value encountered in log result = getattr(ufunc, method)(*inputs, **kwargs)
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\arraylike.py:397: RuntimeWarning: invalid value encountered in log result = getattr(ufunc, method)(*inputs, **kwargs)
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\arraylike.py:397: RuntimeWarning: invalid value encountered in log result = getattr(ufunc, method)(*inputs, **kwargs)
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\arraylike.py:397: RuntimeWarning: invalid value encountered in log result = getattr(ufunc, method)(*inputs, **kwargs)